/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ package net.nutch.net.protocols.http; import net.nutch.net.protocols.Response; import java.io.EOFException; import java.io.IOException; import java.io.PushbackInputStream; import java.net.InetAddress; import java.net.URL; import java.util.logging.Level; import java.util.logging.Logger; import net.nutch.util.LogFormatter; import net.nutch.util.NutchConf; /** A simple HTTP client. */ public class Http { public static final Logger LOG = LogFormatter.getLogger("net.nutch.net.Http"); private static final int DEFAULT_PORT = 80; private static final int CODE_OK = 200; static final int BUFFER_SIZE = 16384; private static final int MAX_REDIRECTS = 5; /** Reserved value for HTTP version number, does not denote any version */ public static final int HTTP_VER_NOTSET= -1; /** HTTP version 1.0 (the earliest version we use) */ public static final int HTTP_VER_1_0= 0; /** HTTP version 1.1 */ public static final int HTTP_VER_1_1= 1; /** Always indicates the latest HTTP version we support, currently 1.1 */ public static final int HTTP_VER_LATEST; String proxyHost=NutchConf.get("http.proxy.host"); int proxyPort=NutchConf.getInt("http.proxy.port",8080); boolean proxyenabled=(proxyHost!=null && proxyHost.length()>0); int timeout = NutchConf.getInt("http.timeout", 10000); int maxContentLength= NutchConf.getInt("http.content.limit",64*1024); String agentString = NutchConf.get("http.agent.name"); private String agentEmail = NutchConf.get("http.agent.email"); static { if (NutchConf.getBoolean("http.version.1.1", true)) HTTP_VER_LATEST= HTTP_VER_1_1; else HTTP_VER_LATEST= HTTP_VER_1_0; } /** * Returns the HTTP version code which represents a lesser version * of HTTP, or HTTP_VER_NOTSET if both equal that value. */ public static int minHttpVersion(int ver1, int ver2) { if (ver1 < ver2) { if (ver1 == HTTP_VER_NOTSET) return ver2; return ver1; } if (ver2 == HTTP_VER_NOTSET) return ver1; return ver2; } /** Set the timeout. */ public void setTimeout(int timeout) {this.timeout = timeout;} /** Set the point at which content is truncated. */ public void setMaxContentLength(int length) {this.maxContentLength = length;} /** Set the agent name */ public void setAgentString(String agentString) { this.agentString = agentString; } /** set the return email address */ public void setAgentEmail(String agentEmail) {this.agentEmail = agentEmail;} /** * Make a single HTTP request and return its response, not following * redirects and not translating HTTP errors to exceptions. If * <code>addr</code> is not null, that address will be used. If * <code>httpAccounting</code> is not <code>null</code>, the it's * fields will be upated during this request. The request will be issued * using the HTTP version specified by <code>httpVersion</code>. */ public Response getRawResponse(URL url, InetAddress addr, MiscHttpAccounting httpAccounting, int httpVersion) throws IOException, HttpException { return new HttpResponse(this, url, addr, httpAccounting, httpVersion); } /** Returns the content of a URL. Follow redirects and translate HTTP errors * to exceptions. */ public Response getResponse(URL url) throws IOException, HttpException { int redirects = 0; URL target = url; while (true) { Response response = new HttpResponse(this, target); // make a request int code = response.getCode(); if (code == 200) { // got a good response return response; // return it } else if (code >= 300 && code < 400) { // handle redirect if (redirects == MAX_REDIRECTS) throw new HttpException("Too many redirects: " + url); target = new URL(response.getHeader("Location")); redirects++; LOG.fine("redirect to " + target); } else { // convert to exception throw new HttpError(code); } } } static int readLine(PushbackInputStream in, StringBuffer line, boolean allowContinuedLine) throws IOException { line.setLength(0); for (int c = in.read(); c != -1; c = in.read()) { switch (c) { case '\r': if (peek(in) == '\n') { in.read(); } case '\n': if (line.length() > 0) { // at EOL -- check for continued line if the current // (possibly continued) line wasn't blank if (allowContinuedLine) switch (peek(in)) { case ' ' : case '\t': // line is continued in.read(); continue; } } return line.length(); // else complete default : line.append((char)c); } } throw new EOFException(); } private static int peek(PushbackInputStream in) throws IOException { int value = in.read(); in.unread(value); return value; } /** For debugging. */ public static void main(String[] args) throws Exception { int timeout = -1; boolean verbose = false; String urlString = null; String usage = "Usage: Http [-verbose] [-timeout N] url"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-timeout")) { // found -timeout option timeout = Integer.parseInt(args[++i]) * 1000; } else if (args[i].equals("-verbose")) { // found -verbose option verbose = true; } else if (i != args.length-1) { System.err.println(usage); System.exit(-1); } else // root is required parameter urlString = args[i]; } Http http = new Http(); if (timeout != -1) // set timeout http.setTimeout(timeout); // set log level if (verbose) { LOG.setLevel(Level.FINE); } Response response = http.getResponse(new URL(urlString)); System.out.println("Code = " + response.getCode()); System.out.println("Content Type: " + response.getHeader("Content-Type")); System.out.println("Content Length: " + response.getHeader("Content-Length")); System.out.println("Content:"); String content = new String(response.getContent()); System.out.println(content); } }